# -*- coding: utf-8 -*-
import scrapy
from school.items import SchoolItem

class SiyuanSpider(scrapy.Spider):
    """
    要求：爬取到选项卡右侧内容
    且是allowed_domains的子路由
    """
    name = 'siyuan'
    allowed_domains = ['http://www.xasyu.cn/']
    start_urls = ['http://www.xasyu.cn/web/?path=xyjs&typeid=xyjs']

    def parse(self, response):
        path = 'http://www.xasyu.cn/web/'
        mod = response.css('.dh>ul>li>a')

        for tag in mod:
            url = tag.css('::attr(href)')[0].extract()

            if url[:4]!='http':
                item = SchoolItem()
                item['url'] = path + url
                # print(item['url'])
                yield response.follow(url=item['url'], callback=self.detail_parse, dont_filter=True)

    def detail_parse(self, response):
        text = response.css('body > div:nth-child(3) > div:nth-child(2)')[0].extract()
        print(text)
